#include "cell.h"
// #define max(a, b) ((a) > (b) ? (a):(b))
// #define min(a, b) ((a) < (b) ? (a):(b))
typedef struct integrate_param {
  cellgrid_t *grid;
  real dt, vscale;
  int need_rebuild;
} integrate_param_t;
#ifdef __sw_host__
#include <qthread.h>
extern void slave_initial_integrate_verlet_cpe(integrate_param_t*);
extern void slave_final_integrate_verlet_cpe(integrate_param_t*);
int initial_integrate_verlet_sw(cellgrid_t *grid, real dt, real vscale) {
  integrate_param_t pm;
  pm.grid = grid;
  pm.dt = dt;
  pm.vscale = vscale;
  pm.need_rebuild = 0;
  qthread_spawn(slave_initial_integrate_verlet_cpe, &pm);
  qthread_join();
  return pm.need_rebuild;
  // FOREACH_LOCAL_CELL(grid, ii, jj, kk, cell) {
  //   for (int i = 0; i < cell->natom; i++) {
  //     vec<real> dv;
  //     /* a[i] = (f[i] / mass[i]), dv = a[i] * dt * 0.5*/
  //     vecscale(dv, cell->f[i], cell->rmass[i] * dt * 0.5 / 48.88821291 / 48.88821291);
  //     /* v is v(t + 0.5dt)*/
  //     vecaddv(cell->v[i], cell->v[i], dv);
  //     /* x is x(t + dt)*/
  //     vecscaleaddv(cell->x[i], cell->x[i], cell->v[i], 1, dt);
  //   }
  // }
}
void final_integrate_verlet_sw(cellgrid_t *grid, real dt) {
  integrate_param_t pm;
  pm.grid = grid;
  pm.dt = dt;
  qthread_spawn(slave_final_integrate_verlet_cpe, &pm);
  qthread_join();
}
real get_kin_sw(cellgrid_t *grid) {
  real kin = 0;
  FOREACH_LOCAL_CELL(grid, ii, jj, kk, cell) {
    for (int i = 0; i < cell->natom; i ++) {
      real v2 = vecnorm2(cell->v[i]);
      kin += 0.5 * cell->mass[i] * v2;
    }
  }
  return kin;
}
#endif
#ifdef __sw_slave__
#include <qthread_slave.h>
#include "dma_macros_new.h"
#include "reg_reduce.h"
#include "swarch.h"
#include "cal.h"
void initial_integrate_verlet_cpe(integrate_param_t *pm) {
  dma_init();
  cellgrid_t lgrid;
  pe_get(pm->grid, &lgrid, sizeof(cellgrid_t));
  real dt = pm->dt;
  real vscale = pm->vscale;
  dma_syn();
  // if (!_MYID)
  int need_rebuild = 0;
  FOREACH_LOCAL_CELL_CPE_RR(&lgrid, ii, jj, kk, cell){
    // if ((getcid(&lgrid, ii, jj, kk) & 63) != _MYID) continue;
    // cal_locked_printf("%d %d %d %d %d\n", ii, jj, kk, getcid(&lgrid, ii, jj, kk), _MYID);
    cellmeta_t imeta;
    pe_get(&cell->basis, &imeta, sizeof(cellmeta_t));
    dma_syn();
    vec<real> x[CELL_CAP], f[CELL_CAP], v[CELL_CAP];
    real rmass[CELL_CAP];
    pe_get(cell->x, x, sizeof(vec<real>) * imeta.natom);
    pe_get(cell->f, f, sizeof(vec<real>) * imeta.natom);
    pe_get(cell->v, v, sizeof(vec<real>) * imeta.natom);
    pe_get(cell->rmass, rmass, sizeof(real) * imeta.natom);
    dma_syn();
    for (int i = 0; i < imeta.natom; i ++){
      vec<real> dv;
      vecscaleaddv(v[i], v[i], f[i], vscale, rmass[i] * dt * 0.5 / 48.88821291 / 48.88821291);
      vecscaleaddv(x[i], x[i], v[i], 1, dt);
      vec<real> delhi;
      vecsubv(delhi, lgrid.len, x[i]);
      vec<real> leave_lo, leave_hi;
      //x[i] + skin < 0 => atom leaves lower
      vecaddv(leave_lo, x[i], lgrid.skin);
      //len + skin - x[i] < 0 => atom leaves higher
      vecaddv(leave_hi, delhi, lgrid.skin);
      real leave_min = min(min3(leave_lo.x, leave_lo.y, leave_lo.z), min3(leave_hi.x, leave_hi.y, leave_hi.z));
      if (leave_min < 0)
        need_rebuild = 1;
    }
    pe_put(cell->x, x, sizeof(vec<real>) * imeta.natom);
    pe_put(cell->v, v, sizeof(vec<real>) * imeta.natom);
    dma_syn();
  }
  if (need_rebuild) {
    pm->need_rebuild = 1;
  }
}
void final_integrate_verlet_cpe(integrate_param_t *pm) {
  dma_init();
  cellgrid_t lgrid;
  pe_get(pm->grid, &lgrid, sizeof(cellgrid_t));
  real dt = pm->dt;
  dma_syn();
  
  FOREACH_LOCAL_CELL_CPE_RR(&lgrid, ii, jj, kk, cell){
    cellmeta_t imeta;
    pe_get(&cell->basis, &imeta, sizeof(cellmeta_t));
    dma_syn();
    vec<real> f[CELL_CAP], v[CELL_CAP];
    real rmass[CELL_CAP];
    pe_get(cell->f, f, sizeof(vec<real>) * imeta.natom);
    pe_get(cell->v, v, sizeof(vec<real>) * imeta.natom);
    pe_get(cell->rmass, rmass, sizeof(real) * imeta.natom);
    dma_syn();
    for (int i = 0; i < imeta.natom; i ++){
      vec<real> dv;
      vecscaleaddv(v[i], v[i], f[i], 1, rmass[i] * dt * 0.5 / 48.88821291 / 48.88821291);
    }
    pe_put(cell->v, v, sizeof(vec<real>) * imeta.natom);
    dma_syn();
  }
}

#endif
